package controller;

import java.util.ArrayList;

import persist.RefPageDB;

import code.extract.CodeExtractor;
import code.extract.SegmentItem;

public class TestPageExtractor {
	public static void main(String [] args){
		CrawlerConfig config = new CrawlerConfig("crawlerconf.jdk_transfer");
		
		RefPageDB refDB = new RefPageDB(config.getRefPageBDBDirname(), 1024*1024, "utf8");
		refDB.open(config.getRefPageBDBDatabasename());
		
		String pageContent;
		try {
			pageContent = refDB.get("http://www.kodejava.org/browse/32.html");
			System.out.println(pageContent);
			ArrayList<SegmentItem> segList = 
				CodeExtractor.extractCodeSegmentFromPage(pageContent, 
						CodeExtractor.CONTINUAL_MERGE|CodeExtractor.SMALL_CODE_MERGE);
			for (int i=0; i<segList.size(); ++i){
				System.out.println(i + "-----------");
				System.out.println(segList.get(i).getIndex() + ":" + segList.get(i).getContent());
			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		
		
		refDB.close();
	}
}
